Rideshare Data

This analysis relies on Transportation Network Provider (TNP) data for trips, drivers, and vehicles. These datasets are published on the City of Chicago’s Data Portal and updated quarterly.

# Connect to database
con <- RPostgres::dbConnect(RPostgres::Postgres(), 
                            host = config::get("host"),
                            port = config::get("port"),
                            dbname = config::get("dbname"),
                            user = config::get("user"), 
                            password = config::get("password"))

# Load in other data
# trips <- dbGetQuery(con, "SELECT * FROM public.trips LIMIT 100;")
# trips <- dbReadTable(con, "public.trips")

# analysis_trips <- dbGetQuery(con, "SELECT * FROM analysis.base_ytd LIMIT 100;")
# analysis_trips <- dbReadTable(con, "analysis.base_ytd")

# Drivers
# drivers <- dbGetQuery(con, "SELECT * FROM public.drivers LIMIT 100;")
# drivers <- dbReadTable(con, "public.drivers")

# Vehicles
# vehicles <- dbGetQuery(con, "SELECT * FROM public.vehicles LIMIT 100;")
# vehicles <- dbReadTable(con, "public.vehicles")

Trips Over Time

Daily Trips

# Import data
daily_trips <- dbGetQuery(con, "SELECT * FROM analysis.daily_trips")

# Clean and tidy
daily_trips <- daily_trips %>%
  mutate("year" = year(date),
         "day" = yday(date),
         "date_x" = date %>% 
           as.character(.) %>%
           str_replace("^\\d{4}","2000") %>% 
           as_date(.),
         "weekday" = wday(date, label = TRUE),
         "trips" = as.numeric(trips)) 
# Plot information
daily_trips_plot_title <- daily_trips %>%
  group_by(year) %>%
  summarize(trips = sum(trips)) %>%
  pivot_wider(everything(), names_from = "year", values_from = "trips") %>% 
  mutate("change" = `2020`-`2019`,
         "change_pct" = (change/`2019`),
         "title_text" = paste0(as.character(comma(-change)), 
                               " (", percent(change_pct), ")"))

# Daily trips YoY plot
gg_daily_trips <- daily_trips %>% 
  group_by(year) %>% 
  arrange(date_x) %>%
  mutate(trips_agg = cumsum(trips)) %>% 
  ggplot(aes(x = date_x, y = trips_agg, color = as.character(year))) +
  geom_line(size = 1.5) +
  geom_vline(xintercept = as.numeric(as.Date("2000-03-21")), 
             linetype = 4, color = "black", size = 1) +
  scale_x_date(date_breaks = "1 month", 
               date_labels = "%b-%d") +
  scale_y_continuous(labels = comma,
                     limits = c(0, NA)) +
  labs(title = paste0("Compared to the same period in 2019 (1/1-9/30),\nrideshare trips have decreased by ",
                      daily_trips_plot_title$title_text, " in 2020"),
       x = "Day of the Year", y = "Aggregate Trips",
       color = "Year") +
  scale_color_ipsum() +
  theme_ipsum()

# Interactive plot
ggplotly(gg_daily_trips)

Prior to Governor Pritzker’s Stay-at-Home Order going into effect on March 21st, trips were already down by about 7 percent year-over-year. However, the bulk of the decline in rideshare occurred over the spring and summer. From late March to October, only 17 million rideshare trips were taken in Chicago, a 71 percent decline compared to 2019.

# Pre/Post COVID table
daily_trips %>%
  mutate("covid_group" = ifelse(date_x >= "2000-03-21", 
                                "Post-COVID", 
                                "Pre-COVID")) %>% 
  mutate("covid_group" = factor(covid_group, 
                                levels = c("Pre-COVID", 
                                           "Post-COVID"))) %>% 
  group_by(covid_group, year) %>%
  summarize(trips = sum(trips)) %>%
  ungroup() %>% 
  pivot_wider(names_from = "year", 
              values_from = "trips") %>% 
  adorn_totals("row") %>% 
  mutate("change" = `2020`-`2019`,
         "change_pct" = (change/`2019`)) %>% 
  mutate(across(c(`2019`, `2020`, change), comma),
         "change_pct" = percent(change_pct)) %>% 
  select("Period" = covid_group, `2019`, `2020`, 
         "Change" = change, "Change (%)" = change_pct) %>% 
  kbl() %>% 
  row_spec(3, bold = TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", 
                                      "condensed", "responsive"))
Period 2019 2020 Change Change (%)
Pre-COVID 24,413,752 22,610,225 -1,803,527 -7%
Post-COVID 59,148,536 17,161,435 -41,987,101 -71%
Total 83,562,288 39,771,660 -43,790,628 -52%

Monthly Trips

# Import data
monthly_trips <- dbGetQuery(con, "SELECT * FROM analysis.monthly_trips")

# Clean and tidy
monthly_trips <- monthly_trips %>% 
  mutate("year" = year(month),
         "date_x" = floor_date(month, unit = "month") %>% 
           as.character(.) %>%
           str_replace("^\\d{4}","2000") %>% 
           as_date(.),
         "month" = month(month, label = TRUE, abbr = TRUE),
         "trips" = as.numeric(trips)) 

Taking a step back to examine trips per month, rideshare trips reached a low of 1.5 million in April. While trips rebounded in the following months, they have hovered around 3.5 million from July through September.

# Monthly trips YoY plot
gg_monthly_trips <- monthly_trips %>% 
  ggplot(aes(x = date_x, y = trips, fill = as.character(year))) +
  geom_bar(stat = "identity", color = "black",
           position = position_dodge(width = 25), width = 20) +
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  scale_y_continuous(labels = comma) +
  labs(title = paste0("Trips per Month declined significantly in March,\nbottoming out at 1.5 million in April"),
       x = "Month", y = "Trips",
       fill = "Year") +
  scale_fill_ipsum() +
  theme_ipsum()

# Interactive plot
ggplotly(gg_monthly_trips)

Trip Changes Surrounding 3/31/20 Stay-at-Home Order

It is clear that behavior changed in March even before the Stay-at-Home Order, as trips per day decreased significantly after Saturday, March 14th, 2020, a full week before the Order went into effect.

# Trips per day YoY plot
gg_trips_per_day <- daily_trips %>%
  filter(month(date, label = TRUE) %in% c("Feb", "Mar")) %>% 
  group_by(year) %>% 
  arrange(date_x) %>%
  ggplot(aes(x = date_x, y = trips, color = as.character(year))) +
  geom_line(size = 1.5) +
  geom_vline(xintercept = as.numeric(as.Date("2000-03-21")), 
             linetype = 4, color = "black", size = 1) +
  scale_x_date(date_breaks = "1 week", 
               date_labels = "%b-%d") +
  scale_y_continuous(labels = comma,
                     limits = c(0, NA)) +
  labs(title = paste0("Trips per Day fell before the Stay-at-Home Order went into effect,\n peaking at 360,000 trips on Saturday, March 14th, 2020"),
       x = "Day of the Year", y = "Trips per Day",
       color = "Year") +
  scale_color_ipsum() +
  theme_ipsum()

# Interactive plot
ggplotly(gg_trips_per_day)

Trips by Hour and Day of the Week

# Day of week YoY plot
# Pre/Post COVID table
gg_day_of_week_trips <- daily_trips %>%
  mutate("covid_group" = ifelse(date_x >= "2000-03-21", "Post-COVID", "Pre-COVID")) %>% 
  mutate("covid_group" = factor(covid_group, 
                                levels = c("Pre-COVID", "Post-COVID"))) %>% 
  group_by(covid_group, year, weekday) %>%
  summarize(trips = sum(trips)) %>%
  ungroup() %>%
  ggplot(aes(x = weekday, y = trips, fill = as.character(year))) +
  geom_bar(stat = "identity", color = "black") +
  # scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  scale_y_continuous(labels = comma) +
  labs(title = paste0("Trips per Day of Week, Pre/Post-COVID"),
       x = "Month", y = "Trips",
       fill = "Year") +
  facet_grid(covid_group ~ year) +
  scale_fill_ipsum() +
  theme_ipsum()

# Interactive plot
ggplotly(gg_day_of_week_trips)

Trip Characteristics

# Import data
trip_chars_avg <- dbGetQuery(con, "SELECT * FROM analysis.tnp_trip_chars") %>% 
  mutate("covid_group" = factor(covid_group, 
                                levels = c("Pre-COVID", "Post-COVID")),
         "trips" = as.numeric(trips)) %>% 
  arrange(covid_group, year)

Distance

Average distance of trips in miles:

# Average distance
# Pre/Post COVID table
trip_chars_avg %>%
  select(covid_group, year, trip_miles) %>% 
  pivot_wider(names_from = "year", 
              values_from = "trip_miles") %>% 
  mutate("change" = `2020`-`2019`,
         "change_pct" = (change/`2019`)) %>% 
  mutate(across(c(`2019`, `2020`, change), comma),
         "change_pct" = percent(change_pct)) %>% 
  select("Period" = covid_group, `2019`, `2020`, 
         "Change" = change, "Change (%)" = change_pct) %>% 
  kbl() %>% 
  kable_styling(bootstrap_options = c("striped", "hover", 
                                      "condensed", "responsive"))
Period 2019 2020 Change Change (%)
Pre-COVID 5.86 6.394 0.53 9.0%
Post-COVID 6.36 6.486 0.13 2.1%

Time

Average time of trips in minutes:

# Average time - minutes
# Pre/Post COVID table
trip_chars_avg %>%
  mutate("trip_minutes" = trip_seconds / 60) %>% 
  select(covid_group, year, trip_minutes) %>% 
  pivot_wider(names_from = "year", 
              values_from = "trip_minutes") %>% 
  mutate("change" = `2020`-`2019`,
         "change_pct" = (change/`2019`)) %>% 
  mutate(across(c(`2019`, `2020`, change), comma),
         "change_pct" = percent(change_pct)) %>% 
  select("Period" = covid_group, `2019`, `2020`, 
         "Change" = change, "Change (%)" = change_pct) %>% 
  kbl() %>% 
  kable_styling(bootstrap_options = c("striped", "hover", 
                                      "condensed", "responsive"))
Period 2019 2020 Change Change (%)
Pre-COVID 17.5 16.9 -0.6 -3%
Post-COVID 18.8 15.8 -3.0 -16%

Fares

# Import data
fares <- dbGetQuery(con, "SELECT * FROM analysis.tnp_fares") %>% 
  mutate("covid_group" = factor(covid_group, 
                                levels = c("Pre-COVID", "Post-COVID")),
         "trips" = as.numeric(trips))

# Distribution of fares
fares_pct <- fares %>%
  group_by(covid_group, year) %>% 
  arrange(year, covid_group, fare) %>% 
  mutate("trips_pct" = trips / sum(trips),
         "trips_pct_agg" = cumsum(trips_pct)) %>% 
  ungroup()

# 95% of trips in all periods <$30 dollars
fares_tail <- fares_pct %>% 
  group_by(covid_group, year) %>% 
  filter(trips_pct_agg < 0.95) %>% 
  arrange(desc(fare)) %>% 
  slice(1) %>% 
  ungroup()

# Average fares
# Pre/Post COVID table
trip_chars_avg %>%
  select(covid_group, year, fare) %>% 
  pivot_wider(names_from = "year", 
              values_from = "fare") %>% 
  mutate("change" = `2020`-`2019`,
         "change_pct" = (change/`2019`)) %>% 
  mutate(across(c(`2019`, `2020`, change), dollar),
         "change_pct" = percent(change_pct)) %>% 
  select("Period" = covid_group, `2019`, `2020`, 
         "Change" = change, "Change (%)" = change_pct) %>% 
  kbl() %>% 
  kable_styling(bootstrap_options = c("striped", "hover", 
                                      "condensed", "responsive"))
Period 2019 2020 Change Change (%)
Pre-COVID $10.79 $11.59 $0.80 7.4%
Post-COVID $12.35 $12.22 -$0.12 -1.0%
# Histogram
gg_fares <- fares %>%
  ggplot(aes(x = fare, y = trips, fill = as.character(year))) +
  geom_bar(stat = "identity", color = "black") +
  geom_vline(data = trip_chars_avg, 
             mapping = aes(xintercept = fare),
             linetype = "dashed", size = 1) +
  scale_x_continuous(labels = dollar_format(prefix = "$"),
                     limits = c(0, max(fares_tail$fare))) +
  scale_y_continuous(labels = comma) +
  labs(title = paste0("Fare Distribution, Pre/Post-COVID"),
       subtitle = "95% of trip fares were less than $30 in each period",
       x = "Fare", y = "Trips",
       fill = "Year") +
  facet_grid(covid_group ~ year) +
  scale_fill_ipsum() +
  theme_ipsum()

# Interactive plot
ggplotly(gg_fares)

Spatial Distribution of Trips

Origin

# Import data
origin_tracts_comp <- dbGetQuery(con, "SELECT * FROM analysis.pickup_tracts_comp")
sf_tracts <- st_read(con, layer = "spatial_tracts")

# Join tabular to spatial, reconfigure
origin_tracts_comp <- left_join(sf_tracts %>% select(GEOID),
                                origin_tracts_comp,
                                by = c("GEOID" = "pickup_census_tract")) %>% 
  st_as_sf() %>% 
  mutate("trips" = as.numeric(trips),
         "covid_group" = factor(covid_group, 
                                levels = c("Pre-COVID", "Post-COVID"))) %>% 
  arrange(covid_group, GEOID)

In the Pre-COVID period (1/1-3/21) the spatial distribution of rideshare pickups in 2019 and 2020 were very similar. After the Stay-at-Home Order went into effect, however, trips cratered, particularly those originating on the South, West, and Northwest Sides. Even the core area of high trip activity, roughly comprising the Loop and the North Side, shrunk in size.

# Map
# origin_tracts_comp %>% 
#   ggplot(aes(fill = trips)) +
#   facet_wrap(~year) +
#   geom_sf(color = "light gray") +
#   coord_sf(crs = 4326) +
#   scale_fill_viridis_c(labels = comma) +
#   labs(title = "Rideshare Trips by Origin Census Tract") +
#   theme_ipsum()
  
# Quantile map
tm_shape(origin_tracts_comp) +
  tm_fill(col = "trips", title = "Trips",
          style = "quantile", palette = "viridis") +
  tm_borders() +
  tm_facets(by = c("covid_group", "year"),
            free.coords = FALSE,
            drop.units = TRUE) +
  tm_layout(main.title = "Rideshare Trips by Origin Census Tract", 
            main.title.position = c("center"),
            legend.outside = FALSE, 
            legend.position = c("left", "bottom"),
            legend.width = 1.5,
            scale = 2.75)

Destinations

# Import data
destination_tracts_comp <- dbGetQuery(con, "SELECT * FROM analysis.dropoff_tracts_comp")

# Join tabular to spatial, reconfigure
destination_tracts_comp <- left_join(sf_tracts %>% select(GEOID),
                                     destination_tracts_comp,
                                     by = c("GEOID" = "dropoff_census_tract")) %>% 
  st_as_sf() %>% 
  mutate("trips" = as.numeric(trips),
         "covid_group" = factor(covid_group, 
                                levels = c("Pre-COVID", 
                                           "Post-COVID"))) %>% 
  arrange(covid_group, GEOID)
# Map
# destination_tracts_comp %>% 
#   ggplot(aes(fill = trips)) +
#   facet_wrap(~year) +
#   geom_sf(color = "light gray") +
#   coord_sf(crs = 4326) +
#   scale_fill_viridis_c(labels = comma) +
#   labs(title = "Rideshare Trips by Destination Census Tract") +
#   theme_ipsum()
  
# Quantile map
tm_shape(destination_tracts_comp) +
  tm_fill(col = "trips", title = "Trips",
          style = "quantile", palette = "viridis") +
  tm_borders() +
  tm_facets(by = c("covid_group", "year"),
            free.coords = FALSE,
            drop.units = TRUE) +
  tm_layout(main.title = "Rideshare Trips by Destination Census Tract", 
            main.title.position = c("center"),
            legend.outside = FALSE, 
            legend.position = c("left", "bottom"),
            legend.width = 1.5,
            scale = 2.75)

Origin-Destination (OD) Flows

# Import data
od_pairs_raw <- dbGetQuery(con, "SELECT * FROM analysis.trip_patterns_tracts_comp") %>% 
  mutate("covid_group" = factor(covid_group, 
                                levels = c("Pre-COVID", "Post-COVID")),
         "trips" = as.numeric(trips))

# Intra- vs inter-tract pairs
od_pairs_intra_inter <- od_pairs_raw %>% 
  mutate("intra_inter" = case_when(pickup_census_tract == 
                                     dropoff_census_tract ~
                                     "Intra (Within Same Tract)",
                                   pickup_census_tract != 
                                     dropoff_census_tract ~
                                     "Inter (Different Tract)")) %>% 
  group_by(covid_group, year, intra_inter) %>% 
  summarize("trips" = sum(trips)) %>% 
  ungroup()

# Remove intra-tract trips, make sure in Chicago
# reorganize columns to convert to desire lines
od_pairs_comp <- od_pairs_raw %>% 
  filter(pickup_census_tract != dropoff_census_tract,
         pickup_census_tract %in% sf_tracts$GEOID,
         dropoff_census_tract %in% sf_tracts$GEOID) %>%
  select(pickup_census_tract, dropoff_census_tract,
         everything()) %>% 
  od2line(flow = ., zones = sf_tracts)

# Create a linewidth variable to scale desire lines
od_pairs_comp <- od_pairs_comp %>% 
  mutate("lwd" = trips / mean(od_pairs_comp$trips)) %>% 
  relocate(lwd, .after = trips)

Flow Types

Before analyzing trip flows, it is important to distinguish between those trips that begin and end within the same tract (intra-flows) and trips that begin and end in different tracts (inter-flows). As seen in the table below, very few rideshare trips started and ended in the same tract. About 1.5% of all trips were intra-flows. This was relatively consistent across years and time periods.

# OD Pairs Intra vs Inter
# Pre/Post-COVID table
od_pairs_intra_inter %>% 
  pivot_wider(names_from = c("year", "covid_group"),
              values_from = "trips") %>% 
  adorn_totals(c("row", "col")) %>%
  adorn_percentages("col") %>%
  adorn_pct_formatting() %>%
  kbl(col.names = c("Trip Type", "2019", "2020", 
                    "2019", "2020", "Total")) %>% 
  add_header_above(c(" ", "Pre-COVID" = 2, "Post-COVID" = 2, " ")) %>%
  row_spec(3, bold = TRUE) %>% 
  kable_styling(bootstrap_options = c("striped", "hover", 
                                      "condensed", "responsive"))
Pre-COVID
Post-COVID
Trip Type 2019 2020 2019 2020 Total
Inter (Different Tract) 98.2% 98.4% 98.7% 98.1% 98.5%
Intra (Within Same Tract) 1.8% 1.6% 1.3% 1.9% 1.5%
Total 100.0% 100.0% 100.0% 100.0% 100.0%

OD Flows, 1000+ Trips

# Distribution of OD trip pairs
od_pairs_pct <- od_pairs_comp %>%
  st_drop_geometry() %>% 
  group_by(covid_group, year) %>% 
  arrange(year, covid_group, desc(trips)) %>% 
  mutate("trips_pct" = trips / sum(trips),
         "trips_pct_agg" = cumsum(trips_pct)) %>% 
  ungroup()

# 90% of OD trip pairs in all periods >10 trips
od_pairs_tail <- od_pairs_pct %>% 
  group_by(covid_group, year) %>% 
  filter(trips_pct_agg < 0.90) %>% 
  arrange(trips) %>% 
  slice(1) %>% 
  ungroup()

# OD flows map (all tracts)
tm_shape(sf_tracts) +
  tm_borders() +
  tm_shape(od_pairs_comp %>% 
             filter(trips >= 1000)) +
  tm_lines(col = "trips", title = "Trips", 
           style = "quantile", palette = "viridis",
           lwd = "lwd", title.lwd = "Trips",
           scale = 40, alpha = 0.5,
           legend.lwd.show = FALSE) +
  tm_facets(by = c("covid_group", "year"), 
            free.coords = FALSE,
            drop.units = TRUE) +
  tm_layout(main.title = "Rideshare Trips by Origin-Destination (OD) Pairs\nOD Pairs with 1,000+ Trips", 
            main.title.position = c("center"),
            legend.outside = FALSE, 
            legend.position = c("left", "bottom"),
            legend.width = 1.5,
            scale = 2.75)

# OD flows map (excluding airports - GEOIDs 17031980000, )
tm_shape(sf_tracts) +
  tm_borders() +
  tm_shape(od_pairs_comp %>% 
             filter(trips >= 1000,
                    !pickup_census_tract %in% c("17031980000", 
                                                "17031980100"),
                    !dropoff_census_tract %in% c("17031980000", 
                                                 "17031980100"),)) +
  tm_lines(col = "trips", title = "Trips", 
           style = "quantile", palette = "viridis",
           lwd = "lwd", title.lwd = "Trips",
           scale = 100, alpha = 0.5,
           legend.lwd.show = FALSE) +
  tm_facets(by = c("covid_group", "year"), 
            free.coords = FALSE,
            drop.units = TRUE) +
  tm_layout(main.title = "Rideshare Trips by Origin-Destination (OD) Pairs\nOD Pairs with 1,000+ Trips\nExcluding O'Hare, Midway Airports", 
            main.title.position = c("center"),
            legend.outside = FALSE, 
            legend.position = c("left", "bottom"),
            legend.width = 1.5,
            scale = 2.75)